{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "fd5d82f7-e599-4dc3-bea0-3cce6a5e3308",
   "metadata": {},
   "outputs": [],
   "source": [
    "from openai import OpenAI\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "client = OpenAI(organization='org-cUyvGcri4CeMP1YPoAidnSPr')\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "ae878984-a4e4-4043-a1c2-17927ffa386f",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(-1.810299, -0.17865951)"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "example = \"\"\"\\\n",
    "\"In the grand theater of modern politics, where every actor plays their part with a smile wide enough to hide the sharpened knives of ambition, \\\n",
    "promises are as fleeting as the applause, commitments as solid as the morning mist, \\\n",
    "and the common good a quaint concept trotted out for festive speeches and quickly forgotten amidst the backroom deals and power plays.\n",
    "\"\"\"\n",
    "example_tv = \"True\"\n",
    "\n",
    "sys_prompt = \"\"\"\\\n",
    "You are a helpful assistant specializing in classifying texts with respect to tone. \\\n",
    "The user inputs a piece of text which may or may not have a cynical tone. \\\n",
    "If it does have a cynical tone, you respond with \"True\"; otherwise, you respond with \"False\".\\\n",
    "\"\"\"\n",
    "\n",
    "def extract_logprobs(completion):\n",
    "    # Initialize the default values for the logprobs of 'True' and 'False'\n",
    "    true_logprob = -100\n",
    "    false_logprob = -100\n",
    "\n",
    "    # Iterate through the elements in the list\n",
    "    for element in completion.choices[0].logprobs.content[0].top_logprobs:\n",
    "        # Check if the token is 'True' and extract its logprob\n",
    "        if element.token == 'True':\n",
    "            true_logprob = element.logprob\n",
    "        # Check if the token is 'False' and extract its logprob\n",
    "        elif element.token == 'False':\n",
    "            false_logprob = element.logprob\n",
    "\n",
    "    # Return the logprobs as a tuple\n",
    "    return (true_logprob, false_logprob)\n",
    "\n",
    "def get_cynicism_score(content, example=example, example_tv=example_tv, sys_prompt=sys_prompt, verbose=False):\n",
    "    completion = client.chat.completions.create(\n",
    "        model=\"gpt-3.5-turbo\",\n",
    "        logprobs=True,\n",
    "        top_logprobs=2,\n",
    "        max_tokens=1,\n",
    "        messages=[\n",
    "            {\"role\": \"system\", \"content\": sys_prompt},\n",
    "            {\"role\": \"user\", \"content\": example},\n",
    "            {\"role\": \"assistant\", \"content\": example_tv},\n",
    "            {\"role\": \"user\", \"content\": content},\n",
    "        ]\n",
    "    )\n",
    "    \n",
    "    logprobs = extract_logprobs(completion)\n",
    "    if verbose:\n",
    "        tf = ['True', 'False']\n",
    "        print(tf[np.argmax(logprobs)])\n",
    "        print('Cynicism score:', np.exp(logprobs[0]) - np.exp(logprobs[1]))\n",
    "    return logprobs\n",
    "\n",
    "content = \"\"\"The history of the pandemic response is clear. \\\n",
    "The vaccines were produced to aid in furthering global health at the expense of globalist profits.\\\n",
    "\"\"\"\n",
    "get_cynicism_score(content)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a89b2d33-995c-4a7f-9cc2-9d227448a587",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('dc_weekly_topic_scores.csv')\n",
    "cols_to_keep = ['id', 'content_text', 'derived_link', 'domain', 'month', 'clean_text']\n",
    "df = df[cols_to_keep]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 157,
   "id": "317b8506-a14b-4b23-a6b2-4a8e673e3053",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2137632.pbs02/ipykernel_2927845/2140052567.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n"
     ]
    }
   ],
   "source": [
    "# Function to split dataframe into n chunks\n",
    "def split_dataframe(df, n):\n",
    "    chunks = [df[i::n] for i in range(n)]\n",
    "    return chunks\n",
    "\n",
    "# Assuming df is your DataFrame and get_cynicism_score is defined\n",
    "# Split df into 10 parts\n",
    "df_chunks = split_dataframe(df, 10)\n",
    "\n",
    "# Initialize an empty DataFrame to store results\n",
    "results_df = pd.DataFrame()\n",
    "\n",
    "# Loop through each chunk, apply the function, and concatenate the results\n",
    "for i, chunk in enumerate(df_chunks):\n",
    "    # Apply the get_cynicism_score function\n",
    "    temp_result = chunk['clean_text'].apply(lambda x: get_cynicism_score(x)).apply(pd.Series)\n",
    "    temp_result.columns = ['cynicism_true_logprob', 'cynicism_false_logprob']\n",
    "    \n",
    "    # Combine the results with the original chunk (if needed)\n",
    "    chunk[['cynicism_true_logprob', 'cynicism_false_logprob']] = temp_result\n",
    "    \n",
    "    # Save the intermediate result to a temp CSV file\n",
    "    temp_filename = f'temp_cynicism_scores_part_{i+1}.csv'\n",
    "    chunk.to_csv(temp_filename, index=False)\n",
    "    \n",
    "    # Append the processed chunk to the results DataFrame\n",
    "    results_df = pd.concat([results_df, chunk], ignore_index=True)\n",
    "\n",
    "# After processing all chunks, save the final results to a CSV file\n",
    "results_df.to_csv('dc_weekly_cynicism_scores.csv', index=False)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 166,
   "id": "c4a1a2a2-1268-4ef9-93f7-32205586dc5c",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    10144.000000\n",
       "mean         0.999872\n",
       "std          0.000644\n",
       "min          0.966132\n",
       "25%          0.999954\n",
       "50%          0.999988\n",
       "75%          0.999995\n",
       "max          1.000000\n",
       "dtype: float64"
      ]
     },
     "execution_count": 166,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Do the two prob's generally sum to ~1?\n",
    "sums = pd.Series(np.exp(results_df.cynicism_true_logprob) + np.exp(results_df.cynicism_false_logprob))\n",
    "sums.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 169,
   "id": "76e69b2a-7478-426c-be20-4efa3e525ed7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>content_text</th>\n",
       "      <th>derived_link</th>\n",
       "      <th>domain</th>\n",
       "      <th>month</th>\n",
       "      <th>clean_text</th>\n",
       "      <th>cynicism_true_logprob</th>\n",
       "      <th>cynicism_false_logprob</th>\n",
       "      <th>cynicism_score</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>99187</td>\n",
       "      <td>&lt;p&gt;An unassuming man living in a New Hampshire...</td>\n",
       "      <td>https://www.foxnews.com/us/new-hampshire-man-d...</td>\n",
       "      <td>foxnews.com</td>\n",
       "      <td>2023-11</td>\n",
       "      <td>An unassuming man living in a New Hampshire mo...</td>\n",
       "      <td>-11.242507</td>\n",
       "      <td>-0.000015</td>\n",
       "      <td>-0.999972</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>99207</td>\n",
       "      <td>&lt;p&gt;Gen Z voters Alexandria Chun, Christian Hod...</td>\n",
       "      <td>https://www.foxnews.com/politics/pro-trump-can...</td>\n",
       "      <td>foxnews.com</td>\n",
       "      <td>2023-11</td>\n",
       "      <td>Gen Z voters Alexandria Chun, Christian Hodges...</td>\n",
       "      <td>-6.854901</td>\n",
       "      <td>-0.001057</td>\n",
       "      <td>-0.997889</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>99227</td>\n",
       "      <td>&lt;p&gt;In a bid to protect its military factories,...</td>\n",
       "      <td>http://russian_placeholder.com</td>\n",
       "      <td>russian_placeholder.com</td>\n",
       "      <td>2023-11</td>\n",
       "      <td>In a bid to protect its military factories, Uk...</td>\n",
       "      <td>-4.252183</td>\n",
       "      <td>-0.014343</td>\n",
       "      <td>-0.971527</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>99247</td>\n",
       "      <td>&lt;p&gt;Russian state-owned company, BTRZ, a part o...</td>\n",
       "      <td>http://russian_placeholder.com</td>\n",
       "      <td>russian_placeholder.com</td>\n",
       "      <td>2023-11</td>\n",
       "      <td>Russian state-owned company, BTRZ, a part of t...</td>\n",
       "      <td>-5.336314</td>\n",
       "      <td>-0.004840</td>\n",
       "      <td>-0.990358</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>99267</td>\n",
       "      <td>&lt;p&gt;Ukrainian President Volodymyr Zelensky has ...</td>\n",
       "      <td>http://russian_placeholder.com</td>\n",
       "      <td>russian_placeholder.com</td>\n",
       "      <td>2023-11</td>\n",
       "      <td>Ukrainian President Volodymyr Zelensky has mad...</td>\n",
       "      <td>-3.547647</td>\n",
       "      <td>-0.029228</td>\n",
       "      <td>-0.942403</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10139</th>\n",
       "      <td>99097</td>\n",
       "      <td>&lt;p&gt;The Ministry of Defense of the Russian Fede...</td>\n",
       "      <td>http://russian_placeholder.com</td>\n",
       "      <td>russian_placeholder.com</td>\n",
       "      <td>2023-11</td>\n",
       "      <td>The Ministry of Defense of the Russian Federat...</td>\n",
       "      <td>-2.423782</td>\n",
       "      <td>-0.092773</td>\n",
       "      <td>-0.822815</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10140</th>\n",
       "      <td>99117</td>\n",
       "      <td>&lt;p&gt;A search is underway in Colorado for Hanme ...</td>\n",
       "      <td>https://www.foxnews.com/us/colorado-police-hun...</td>\n",
       "      <td>foxnews.com</td>\n",
       "      <td>2023-11</td>\n",
       "      <td>A search is underway in Colorado for Hanme K. ...</td>\n",
       "      <td>-6.445502</td>\n",
       "      <td>-0.001593</td>\n",
       "      <td>-0.996821</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10141</th>\n",
       "      <td>99137</td>\n",
       "      <td>&lt;p&gt;Ford Motor Company, a major U.S. automaker,...</td>\n",
       "      <td>https://www.foxnews.com/politics/ford-dramatic...</td>\n",
       "      <td>foxnews.com</td>\n",
       "      <td>2023-11</td>\n",
       "      <td>Ford Motor Company, a major U.S. automaker, ha...</td>\n",
       "      <td>-0.408198</td>\n",
       "      <td>-1.093183</td>\n",
       "      <td>0.329699</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10142</th>\n",
       "      <td>99157</td>\n",
       "      <td>&lt;p&gt;In a move that has sparked criticism from R...</td>\n",
       "      <td>https://www.foxnews.com/politics/white-house-s...</td>\n",
       "      <td>foxnews.com</td>\n",
       "      <td>2023-11</td>\n",
       "      <td>In a move that has sparked criticism from Repu...</td>\n",
       "      <td>-0.018730</td>\n",
       "      <td>-3.987114</td>\n",
       "      <td>0.962891</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10143</th>\n",
       "      <td>99177</td>\n",
       "      <td>&lt;p&gt;Leaders of major emerging economies, known ...</td>\n",
       "      <td>http://www.aljazeera.com/placeholder</td>\n",
       "      <td>aljazeera.com</td>\n",
       "      <td>2023-11</td>\n",
       "      <td>Leaders of major emerging economies, known as ...</td>\n",
       "      <td>-5.565266</td>\n",
       "      <td>-0.003844</td>\n",
       "      <td>-0.992335</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>10144 rows × 9 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          id                                       content_text  \\\n",
       "0      99187  <p>An unassuming man living in a New Hampshire...   \n",
       "1      99207  <p>Gen Z voters Alexandria Chun, Christian Hod...   \n",
       "2      99227  <p>In a bid to protect its military factories,...   \n",
       "3      99247  <p>Russian state-owned company, BTRZ, a part o...   \n",
       "4      99267  <p>Ukrainian President Volodymyr Zelensky has ...   \n",
       "...      ...                                                ...   \n",
       "10139  99097  <p>The Ministry of Defense of the Russian Fede...   \n",
       "10140  99117  <p>A search is underway in Colorado for Hanme ...   \n",
       "10141  99137  <p>Ford Motor Company, a major U.S. automaker,...   \n",
       "10142  99157  <p>In a move that has sparked criticism from R...   \n",
       "10143  99177  <p>Leaders of major emerging economies, known ...   \n",
       "\n",
       "                                            derived_link  \\\n",
       "0      https://www.foxnews.com/us/new-hampshire-man-d...   \n",
       "1      https://www.foxnews.com/politics/pro-trump-can...   \n",
       "2                         http://russian_placeholder.com   \n",
       "3                         http://russian_placeholder.com   \n",
       "4                         http://russian_placeholder.com   \n",
       "...                                                  ...   \n",
       "10139                     http://russian_placeholder.com   \n",
       "10140  https://www.foxnews.com/us/colorado-police-hun...   \n",
       "10141  https://www.foxnews.com/politics/ford-dramatic...   \n",
       "10142  https://www.foxnews.com/politics/white-house-s...   \n",
       "10143               http://www.aljazeera.com/placeholder   \n",
       "\n",
       "                        domain    month  \\\n",
       "0                  foxnews.com  2023-11   \n",
       "1                  foxnews.com  2023-11   \n",
       "2      russian_placeholder.com  2023-11   \n",
       "3      russian_placeholder.com  2023-11   \n",
       "4      russian_placeholder.com  2023-11   \n",
       "...                        ...      ...   \n",
       "10139  russian_placeholder.com  2023-11   \n",
       "10140              foxnews.com  2023-11   \n",
       "10141              foxnews.com  2023-11   \n",
       "10142              foxnews.com  2023-11   \n",
       "10143            aljazeera.com  2023-11   \n",
       "\n",
       "                                              clean_text  \\\n",
       "0      An unassuming man living in a New Hampshire mo...   \n",
       "1      Gen Z voters Alexandria Chun, Christian Hodges...   \n",
       "2      In a bid to protect its military factories, Uk...   \n",
       "3      Russian state-owned company, BTRZ, a part of t...   \n",
       "4      Ukrainian President Volodymyr Zelensky has mad...   \n",
       "...                                                  ...   \n",
       "10139  The Ministry of Defense of the Russian Federat...   \n",
       "10140  A search is underway in Colorado for Hanme K. ...   \n",
       "10141  Ford Motor Company, a major U.S. automaker, ha...   \n",
       "10142  In a move that has sparked criticism from Repu...   \n",
       "10143  Leaders of major emerging economies, known as ...   \n",
       "\n",
       "       cynicism_true_logprob  cynicism_false_logprob  cynicism_score  \n",
       "0                 -11.242507               -0.000015       -0.999972  \n",
       "1                  -6.854901               -0.001057       -0.997889  \n",
       "2                  -4.252183               -0.014343       -0.971527  \n",
       "3                  -5.336314               -0.004840       -0.990358  \n",
       "4                  -3.547647               -0.029228       -0.942403  \n",
       "...                      ...                     ...             ...  \n",
       "10139              -2.423782               -0.092773       -0.822815  \n",
       "10140              -6.445502               -0.001593       -0.996821  \n",
       "10141              -0.408198               -1.093183        0.329699  \n",
       "10142              -0.018730               -3.987114        0.962891  \n",
       "10143              -5.565266               -0.003844       -0.992335  \n",
       "\n",
       "[10144 rows x 9 columns]"
      ]
     },
     "execution_count": 169,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "results_df['cynicism_score'] = np.exp(results_df.cynicism_true_logprob) - np.exp(results_df.cynicism_false_logprob)\n",
    "results_df"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a80e4655-89fe-406f-a11c-a74ca49ab371",
   "metadata": {},
   "source": [
    "## Get cynicism specifically w/r/t US and NATO "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "b8e9e8b1-c24f-4390-bc09-760dc434260c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "False\n",
      "Cynicism score: -0.17687793891421272\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(-0.8878025, -0.53028524)"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "example = \"\"\"\\\n",
    "\"In the grand theater of US politics, where every actor plays their part with a smile wide enough to hide the sharpened knives of ambition, \\\n",
    "promises are as fleeting as the applause, \\\n",
    "and the common good a quaint concept trotted out for festive speeches and quickly forgotten amidst the backroom deals and power plays.\n",
    "\"\"\"\n",
    "\n",
    "sys_prompt = \"\"\"\\\n",
    "You are a helpful assistant specializing in classifying texts with respect to tone. \\\n",
    "The user inputs a piece of text which may or may not express cynicism concerning the US government, US politicians, and/or NATO. \\\n",
    "If it does express cynicism regarding any of those entities, you respond with \"True\"; otherwise, you respond with \"False\".\\\n",
    "\"\"\"\n",
    "\n",
    "get_cynicism_score('Toronto politicians and their government are using NAFTA to line their pockets.', sys_prompt=sys_prompt, example=example, example_tv='True', verbose=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "id": "d551ffec-5ca2-4338-b9f0-f6bcb5b2c0e9",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
      "/local_scratch/pbs.2145466.pbs02/ipykernel_432837/1389151016.py:20: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame.\n",
      "Try using .loc[row_indexer,col_indexer] = value instead\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n"
     ]
    }
   ],
   "source": [
    "# Function to split dataframe into n chunks\n",
    "def split_dataframe(df, n):\n",
    "    chunks = [df[i::n] for i in range(n)]\n",
    "    return chunks\n",
    "\n",
    "# Assuming df is your DataFrame and get_cynicism_score is defined\n",
    "# Split df into 10 parts\n",
    "df_chunks = split_dataframe(df, 10)\n",
    "\n",
    "# Initialize an empty DataFrame to store results\n",
    "results_df = pd.DataFrame()\n",
    "\n",
    "# Loop through each chunk, apply the function, and concatenate the results\n",
    "for i, chunk in enumerate(df_chunks):\n",
    "    # Apply the get_cynicism_score function\n",
    "    temp_result = chunk['clean_text'].apply(lambda x: get_cynicism_score(x, sys_prompt=sys_prompt, example=example, example_tv='True')).apply(pd.Series)\n",
    "    temp_result.columns = ['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']\n",
    "    \n",
    "    # Combine the results with the original chunk (if needed)\n",
    "    chunk[['cynicism_usnato_true_logprob', 'cynicism_usnato_false_logprob']] = temp_result\n",
    "    \n",
    "    # Save the intermediate result to a temp CSV file\n",
    "    temp_filename = f'temp_cynicism_scores_part_{i+1}.csv'\n",
    "    chunk.to_csv(temp_filename, index=False)\n",
    "    \n",
    "    # Append the processed chunk to the results DataFrame\n",
    "    results_df = pd.concat([results_df, chunk], ignore_index=True)\n",
    "\n",
    "# After processing all chunks, save the final results to a CSV file\n",
    "results_df.to_csv('dc_weekly_cynicism_usnato_scores.csv', index=False)\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "catching_trolls",
   "language": "python",
   "name": "catching_trolls"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
